The file contains the source code of the data applications of the trading strategies presented in Dynamic Data Science Applications in Optimal Profit Algorithmic Trading.
pkg_list = c('quantmod', 'TTR', 'zoo', 'tseries', 'fGarch','PEIP','gdata',
'gridExtra','tidyverse', 'aTSA', 'dygraphs', 'urca')
# Function to install required packages if needed
for (pkg in pkg_list)
{
# Loading the library.
if (!library(pkg, logical.return=TRUE, character.only=TRUE))
{
# If the library cannot be loaded, install first and then load.
install.packages(pkg)
library(pkg, character.only=TRUE)
}
}
start.date = '2017-2-1' # starting date of stock
end.date = '2019-3-17' # ending date of stock
# Download the selected stocks from Yahoo finance
getSymbols(c('SPY','ADBE','EBAY','MSFT','IBM','GLD','GDX','EWA','EWC','IGE'),
src = "yahoo", from = start.date, to = end.date)
## [1] "SPY" "ADBE" "EBAY" "MSFT" "IBM" "GLD" "GDX" "EWA" "EWC" "IGE"
stocks <-merge(SPY = SPY[, "SPY.Adjusted"], ADBE = ADBE[, "ADBE.Adjusted"],
EBAY= EBAY[, "EBAY.Adjusted"], MSFT = MSFT[, "MSFT.Adjusted"],
IBM = IBM[, "IBM.Adjusted"], GLD = GLD[, "GLD.Adjusted"],
GDX = GDX[, "GDX.Adjusted"], EWA = EWA[, "EWA.Adjusted"],
EWC = EWC[, "EWC.Adjusted"], IGE = IGE[, "IGE.Adjusted"])
head(stocks)
## SPY.Adjusted ADBE.Adjusted EBAY.Adjusted MSFT.Adjusted
## 2017-02-01 213.7879 113.36 31.55644 60.07822
## 2017-02-02 213.9288 113.16 31.35050 59.69079
## 2017-02-03 215.4034 115.17 31.44856 60.17270
## 2017-02-06 215.0183 114.46 31.40934 60.13491
## 2017-02-07 215.0277 114.96 31.80159 59.93647
## 2017-02-08 215.3095 116.13 32.60570 59.85144
## IBM.Adjusted GLD.Adjusted GDX.Adjusted EWA.Adjusted
## 2017-02-01 151.5919 115.20 23.36890 18.50433
## 2017-02-02 151.8442 115.84 23.88821 18.69501
## 2017-02-03 152.9227 116.13 24.00579 18.71234
## 2017-02-06 152.9575 117.70 24.88764 18.53900
## 2017-02-07 155.2189 117.46 24.77986 18.49566
## 2017-02-08 154.4387 118.19 25.05421 18.59100
## EWC.Adjusted IGE.Adjusted
## 2017-02-01 25.32722 31.71767
## 2017-02-02 25.39264 31.88734
## 2017-02-03 25.48610 32.13736
## 2017-02-06 25.30852 31.85162
## 2017-02-07 25.23376 31.38728
## 2017-02-08 25.32722 31.51230
find_cointegrated_pairs <- function (data){
n <- ncol (data)
pvalue_matrix <- matrix(0, nrow=n, ncol=n)
pairs <- list()
m <- 1
for (i in 1:n){
for (j in 1:n){
if(i>=j) {
next;
} else{
S1 <- data[, i]
S2 <- data[, j]
result <- coint.test (as.numeric(S1), as.numeric(S2), output = FALSE)
pvalue_matrix [i, j] <- result[,3][[1]]
if (result[,3][[1]] < 0.05){
pairs [[m]] <- c (i, j)
m <- m+1}
}
}
}
newlist <- list(pvalue_matrix, pairs)
return (newlist)
}
pvalue<-find_cointegrated_pairs(stocks)
#round(pvalue[[1]],3)
assets <- c("EWA", "EWC") # selected two assets
pair.stock <- merge(stocks[, 8], stocks[, 9], join="inner")
colnames(pair.stock) <- assets
# Plot the assets
plot(pair.stock, legend.loc=1)
# Test of multiple cointegration
jotest=ca.jo(pair.stock, type="trace", K=2, ecdet="none", spec="longrun")
summary(jotest)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.033228572 0.006799466
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 1 | 3.62 6.50 8.18 11.65
## r = 0 | 21.57 15.66 17.95 23.52
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## EWA.l2 EWC.l2
## EWA.l2 1.0000000 1.000000
## EWC.l2 -0.5443606 -2.904187
##
## Weights W:
## (This is the loading matrix)
##
## EWA.l2 EWC.l2
## EWA.d -0.072572442 0.003358185
## EWC.d -0.009786007 0.005780384
# select pairwise cointegrated stocks
x <- pair.stock[, 1]
y <- pair.stock[, 2]
x$intercept <- rep(1, nrow(x)) # create intercept
var_e <- 0.0001 # innovation covariance of observation
sigma_v <- var_e/(1-var_e)*diag(2) #covariance matrix of state
Ve <- 0.001
P_t <- matrix(rep(0, 4), nrow=2)
P <- matrix(rep(0, 4), nrow=2)
beta <- matrix(rep(0, nrow(y)*2), ncol=2)
y_fitted <- rep(0, nrow(y))
nu <- rep(0, nrow(y))
Q <- rep(0, nrow(y))
################################################################
# Function to implements the iterative Non-Gaussian filter operations
################################################################
kalman_iteration <- function(y, x) {
for(i in 1:nrow(y)) {
if(i > 1) {
beta[i, ] <- beta[i-1, ] # state transition
P_t <- P + sigma_v # state covariance prediction
}
y_fitted[i] <- x[i, ] %*% beta[i, ] # observation prediction
Q[i] <- x[i, ] %*% P_t %*% t(x[i, ]) + Ve # observation variance prediction
nu[i] <- y[i] - y_fitted[i] # prediction error
K_gain <- P_t %*% t(x[i, ]) / Q[i] # information gain
# updating the state
beta[i, ] <- beta[i, ] + K_gain * nu[i]
P <- P_t - K_gain %*% x[i, ] %*% P_t
}
return(list(beta, P, Q, nu))
}
res <- kalman_iteration(y,x) # Implementation of function
# Extract results
beta <- xts(res[[1]], order.by=index(pair.stock))
plot(beta[2:nrow(beta), 1],type='l',main ='Dynamic hedge ratio',col = "blue")
plot(beta[2:nrow(beta), 2],type='l',main ='Dynamic intercept',col = "blue")
# plot trade signals
nu <- xts(res[[4]], order.by=index(pair.stock))
sqrtQ <- xts(sqrt(res[[3]]), order.by=index(pair.stock))
signals <- merge(nu, sqrtQ, -sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
#############################################################
# Required functions to calculate p and profit
#############################################################
# Function to generate positions and calculate profit and loss for multiple stocks
PnL<-function(signals,nu,beta,x,y){
len <- length(index(signals))
vec.sig<-ifelse((signals[1:len]$nu > signals[1:len]$sqrtQ) &
(lag.xts(signals$nu, 1) < lag.xts(signals$sqrtQ, 1)), -1,
ifelse((signals[1:len]$nu < signals[1:len]$negsqrtQ) &
(lag.xts(signals$nu, 1) > lag.xts(signals$negsqrtQ, 1)), 1, 0))
colnames(vec.sig) <- "vectorise.signals"
# getting only the first signals
vec.sig[vec.sig == 0] <- NA # replace 0 by NA
vec.sig <- na.locf(vec.sig) # replace the missing values by last real observations
vec.sig <- diff(vec.sig)/2
# generate positions and calculate profit for two stocks
if(ncol(beta)==2){
sim <- merge(lag.xts(vec.sig,1), beta[, 1], x[, 1], y)
colnames(sim) <- c("sig", "hedge", assets[1], assets[2])
sim$posX <- sim$sig * -1000 * sim$hedge
sim$posY <- sim$sig * 1000
sim$posX[sim$posX == 0] <- NA
sim$posX <- na.locf(sim$posX)
sim$posY[sim$posY == 0] <- NA
sim$posY <- na.locf(sim$posY)
PLX <- sim$posX * diff(sim[, assets[1]])
PLY <- sim$posY * diff(sim[, assets[2]])
profit_loss <- PLX + PLY
}
# generate positions and calculate profit for three stocks
if(ncol(beta)==3){
sim <- merge(lag.xts(vec.sig,1), beta[, 1], beta[, 2], x[, 1], x[, 2], y)
colnames(sim) <- c("sig", "hedge1", "hedge2", assets[1], assets[2], assets[3])
sim$posX1 <- sim$sig * -1000 * sim$hedge1
sim$posX2 <- sim$sig * -1000 * sim$hedge2
sim$posY <- sim$sig * 1000
sim$posX1[sim$posX1 == 0] <- NA
sim$posX1 <- na.locf(sim$posX1)
sim$posX2[sim$posX2 == 0] <- NA
sim$posX2 <- na.locf(sim$posX2)
sim$posY[sim$posY == 0] <- NA
sim$posY <- na.locf(sim$posY)
PLX <- sim$posX1 * diff(sim[, assets[1]]) + sim$posX2 * diff(sim[, assets[2]])
PLY <- sim$posY * diff(sim[, assets[3]])
profit_loss <- PLX + PLY
}
# generate positions and calculate profit for four stocks
if(ncol(beta)==4){
sim <- merge(lag.xts(vec.sig,1), beta[,1], beta[,2], beta[,3], x[,1], x[,2], x[,3], y)
colnames(sim) <- c("sig", "hedge1", "hedge2", "hedge3",assets[1],assets[2],
assets[3], assets[4])
sim$posX1 <- sim$sig * -1000 * sim$hedge1
sim$posX2 <- sim$sig * -1000 * sim$hedge2
sim$posX3 <- sim$sig * -1000 * sim$hedge3
sim$posY <- sim$sig * 1000
sim$posX1[sim$posX1 == 0] <- NA
sim$posX1 <- na.locf(sim$posX1)
sim$posX2[sim$posX2 == 0] <- NA
sim$posX2 <- na.locf(sim$posX2)
sim$posX3[sim$posX3 == 0] <- NA
sim$posX3 <- na.locf(sim$posX3)
sim$posY[sim$posY == 0] <- NA
sim$posY <- na.locf(sim$posY)
PLX <- sim$posX1 * diff(sim[, assets[1]]) + sim$posX2 * diff(sim[, assets[2]]) +
sim$posX3 * diff(sim[, assets[3]])
PLY <- sim$posY * diff(sim[, assets[2]])
profit_loss <- PLX + PLY
}
return(ProfitLoss=profit_loss)
}
# Functions to calculate the optimal value of threshold, $p$
SR.train<-function(nu, sqrtQ, p){
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
# Implementation of profit
profit.loss<- PnL(signals,nu,beta,x,y)
st_p <- sqrt(252)*mean(na.omit(profit.loss))/sd(na.omit(profit.loss))
return (st_p)
}
# determining optimal p to maximize Sharpe ratio
p <- seq(0.1, 2, 0.01)
SR<-0
for(j in 1:length(p)){
SR[j] <- SR.train (nu, sqrtQ, p[j])
}
max(na.omit(SR))
## [1] 1.515397
plot(p, SR, type = "l", col = "blue")
## Calcualte the optimal value of p by maximizing the SR
p.opt<-p[which.max(SR)]
p.opt
## [1] 1.48
# create optimal trading signals
p <- p.opt
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
plot(signals[3:length(index(signals))], ylab='nu', main = 'Trading signals',
col=c('blue', 'red', 'red'), lwd=c(1,2,2))
## Calculate the cumulative profit using optimal trading signals
# Implementation of profit and loss fucntion to calculate cumulative profit
profit.loss<- PnL(signals,nu,beta,x,y)
sum (na.omit(profit.loss))
## [1] 7565.727
plot(cumsum(na.omit(profit.loss)), main="Cumulative profit, $", col = "blue")
assets <- c("EWA", "EWC", "IGE")
three.stocks <- merge(stocks[, 8], stocks[, 9], stocks [, 10])
colnames(three.stocks) <- assets
plot(three.stocks, legend.loc=1)
jotest=ca.jo(three.stocks, type="trace", K=2, ecdet="none", spec="longrun")
summary(jotest)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.038080520 0.011171405 0.009036416
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 2 | 4.82 6.50 8.18 11.65
## r <= 1 | 10.79 15.66 17.95 23.52
## r = 0 | 31.40 28.71 31.52 37.22
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## EWA.l2 EWC.l2 IGE.l2
## EWA.l2 1.00000000 1.0000000 1.000000
## EWC.l2 -0.47601178 -1.2951450 -3.893482
## IGE.l2 -0.01591207 -0.2048727 2.219207
##
## Weights W:
## (This is the loading matrix)
##
## EWA.l2 EWC.l2 IGE.l2
## EWA.d -0.07058604 0.005843659 -0.003161974
## EWC.d -0.01404794 0.014292636 -0.003131786
## IGE.d 0.01849513 0.011053309 -0.009246670
x <- three.stocks[, 1:2]
y <- three.stocks[, 3]
x$intercept <- rep(1, nrow(x)) # creating intercept
var_e <- 0.0001 # innovation covariance of observations
sigma_v <- var_e/(1-var_e)*diag(3) # covariance matrix of state
#sigma_v
Ve <- 0.001
P_t <- matrix(rep(0, 9), nrow=3)
P <- matrix(rep(0, 9), nrow=3)
beta <- matrix(rep(0, nrow(y)*3), ncol=3)
y_fitted <- rep(0, nrow(y))
nu <- rep(0, nrow(y))
Q <- rep(0, nrow(y))
# Implementation of function of iterative Non-Gaussian filter operations
res <- kalman_iteration(y,x)
# Extract results
beta <- xts(res[[1]], order.by=index(three.stocks))
plot(beta[2:nrow(beta), 1:2], type='l', main = 'Dynamic hedge ratios',
col = c("blue", "green"))
plot(beta[2:nrow(beta), 3], type='l', main = 'Dynamic updated intercept', col = "blue")
# plot trade signals
nu <- xts(res[[4]], order.by=index(three.stocks))
sqrtQ <- xts(sqrt(res[[3]]), order.by=index(three.stocks))
signals <- merge(nu, sqrtQ, -sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
# determining optimal p to maximize Sharpe ratio
p <- seq(0.1, 2, 0.01)
SR<-0
for(j in 1:length(p)){
SR[j] <- SR.train (nu, sqrtQ, p[j])
}
max(na.omit(SR))
## [1] 1.581271
plot(p, SR, type = "l", col = "blue", ylab = "Annualized SR")
## Calcualte the optimal value of p by maximizing the SR
p.opt<-p[which.max(SR)]
p.opt
## [1] 1.02
# create optimal trading signals
p <- p.opt
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
plot(signals[3:length(index(signals))], ylab='nu', main = 'Trading signals',
col=c('blue', 'red', 'red'), lwd=c(1,2,2))
## Calculate the cumulative profit using optimal trading signals
# Implementation of profit and loss fucntion to calculate cumulative profit
profit.loss<- PnL(signals,nu,beta,x,y)
sum (na.omit(profit.loss))
## [1] 9713.534
plot(cumsum(na.omit(profit.loss)), main="Cumulative profit, $", col = "blue")
assets <- c("GDX", "EWA", "EWC", "IGE")
four.stocks <- merge(stocks[, 7], stocks[, 8], stocks[, 9], stocks [, 10])
colnames(four.stocks) <- assets
plot(four.stocks, legend.loc=1)
jotest=ca.jo(four.stocks, type="trace", K=2, ecdet="none", spec="longrun")
summary(jotest)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.045577162 0.021370707 0.011810312 0.009079026
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 3 | 4.84 6.50 8.18 11.65
## r <= 2 | 11.15 15.66 17.95 23.52
## r <= 1 | 22.62 28.71 31.52 37.22
## r = 0 | 47.39 45.23 48.28 55.43
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## GDX.l2 EWA.l2 EWC.l2 IGE.l2
## GDX.l2 1.00000000 1.0000000 1.000000 1.000000
## EWA.l2 -19.53896573 -0.4622338 3.276195 7.277618
## EWC.l2 9.13739689 0.5825642 -2.241764 -20.127084
## IGE.l2 0.02478951 0.2101317 -1.271165 8.400918
##
## Weights W:
## (This is the loading matrix)
##
## GDX.l2 EWA.l2 EWC.l2 IGE.l2
## GDX.d 0.0026561203 -0.0252144712 0.001788170 -0.0005459360
## EWA.d 0.0040943496 0.0023533474 0.002981879 -0.0004305856
## EWC.d 0.0012926404 -0.0006573674 0.006025750 -0.0001939488
## IGE.d -0.0002119599 0.0011256165 0.007472117 -0.0016118821
x <- four.stocks[, 1:3]
y <- four.stocks[, 4]
x$intercept <- rep(1, nrow(x)) # Creating intercept
var_e <- 0.0001 # innovation covariance of observations
sigma_v <- var_e/(1-var_e)*diag(4) # covariance matrix of state
#sigma_v
Ve <- 0.001
P_t <- matrix(rep(0, 16), nrow=4)
P <- matrix(rep(0, 16), nrow=4)
beta <- matrix(rep(0, nrow(y)*4), ncol=4)
y_fitted <- rep(0, nrow(y))
nu <- rep(0, nrow(y))
Q <- rep(0, nrow(y))
# Implementation of function of iterative Non-Gaussian filter operations
res <- kalman_iteration(y,x)
# Extract results
beta <- xts(res[[1]], order.by=index(four.stocks))
plot(beta[2:nrow(beta), 1:3], type='l', main = 'Dynamic updated hedge ratios',
col = c("blue", "green", "red"))
plot(beta[2:nrow(beta), 4], type='l', main = 'Dynamic updated intercept', col = "blue")
# plot trading signals
nu <- xts(res[[4]], order.by=index(four.stocks))
sqrtQ <- xts(sqrt(res[[3]]), order.by=index(four.stocks))
signals <- merge(nu, sqrtQ, -sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
# determining optimal p to maximize Sharpe ratio
p <- seq(0.1, 2, 0.01)
SR<-0
for(j in 1:length(p)){
SR[j] <- SR.train (nu, sqrtQ, p[j])
}
max(na.omit(SR))
## [1] 1.870764
plot(p, SR, type = "l", col = "blue", ylab = "Annualized SR")
## Calcualte the optimal value of p by maximizing the SR
p.opt<-p[which.max(SR)]
p.opt
## [1] 0.31
# create optimal trading signals
p <- p.opt
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
plot(signals[3:length(index(signals))], ylab='nu', main = 'Trading signals',
col=c('blue', 'red', 'red'), lwd=c(1,2,2))
## Calculate the cumulative profit using optimal trading signals
# Implementation of profit and loss fucntion to calculate cumulative profit
profit.loss<- PnL(signals,nu,beta,x,y)
plot(cumsum(na.omit(profit.loss)), main="Cumulative profit, $", col = "blue")
sum (na.omit(profit.loss))
## [1] 9045.805
maxx<- nrow (stocks)
GDX <- as.numeric(stocks [maxx, 7]) - as.numeric(stocks [1, 7])
EWA <- as.numeric(stocks [maxx, 8]) - as.numeric(stocks [1, 8])
EWC <- as.numeric(stocks [maxx, 9]) - as.numeric(stocks [1, 9])
IGE <- as.numeric(stocks [maxx, 10]) - as.numeric(stocks [1, 10])
1000*(EWA + EWC)
## [1] 3915.617
1000*(EWA + EWC + IGE)
## [1] 1319.735
1000*(GDX + EWA + EWC + IGE)
## [1] 93.836